{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Explain Blackbox Regressors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook we will use the interpret package to explain blackbox regressors using SHAP, Lime, MorrisSensitivity, and PartialDependence.\n",
    "\n",
    "This notebook can be found in our [**_examples folder_**](https://github.com/interpretml/interpret/tree/develop/docs/interpret/python/examples) on GitHub."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# install interpret if not already installed\n",
    "try:\n",
    "    import interpret\n",
    "except ModuleNotFoundError:\n",
    "    !pip install --quiet interpret pandas scikit-learn lime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.datasets import load_diabetes\n",
    "from sklearn.model_selection import train_test_split\n",
    "from interpret import show\n",
    "\n",
    "from interpret import set_visualize_provider\n",
    "from interpret.provider import InlineProvider\n",
    "set_visualize_provider(InlineProvider())\n",
    "\n",
    "X, y = load_diabetes(return_X_y=True, as_frame=True)\n",
    "\n",
    "seed = 42\n",
    "np.random.seed(seed)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h2>Train a blackbox regression system</h2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.pipeline import Pipeline\n",
    "\n",
    "#Blackbox system can include preprocessing, not just a regressor!\n",
    "pca = PCA()\n",
    "rf = RandomForestRegressor(random_state=seed)\n",
    "\n",
    "blackbox_model = Pipeline([('pca', pca), ('rf', rf)])\n",
    "blackbox_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h2>Show blackbox model performance</h2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.perf import RegressionPerf\n",
    "\n",
    "blackbox_perf = RegressionPerf(blackbox_model).explain_perf(X_test, y_test, name='Blackbox')\n",
    "show(blackbox_perf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h2>Local Explanations: How an individual prediction was made</h2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.blackbox import LimeTabular\n",
    "\n",
    "#Blackbox explainers need a predict function, and optionally a dataset\n",
    "lime = LimeTabular(blackbox_model, X_train, random_state=1)\n",
    "\n",
    "#Pick the instances to explain, optionally pass in labels if you have them\n",
    "lime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')\n",
    "\n",
    "show(lime_local, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.blackbox import ShapKernel\n",
    "\n",
    "background_val = pd.DataFrame(np.median(X_train, axis=0).reshape(1, -1), columns=X.columns)\n",
    "shap = ShapKernel(blackbox_model, background_val)\n",
    "shap_local = shap.explain_local(X_test[:5], y_test[:5], name='SHAP')\n",
    "show(shap_local, 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h2>Global Explanations: How the model behaves overall</h2>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.blackbox import MorrisSensitivity\n",
    "\n",
    "sensitivity = MorrisSensitivity(blackbox_model, X_train)\n",
    "sensitivity_global = sensitivity.explain_global(name=\"Global Sensitivity\")\n",
    "\n",
    "show(sensitivity_global)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from interpret.blackbox import PartialDependence\n",
    "\n",
    "pdp = PartialDependence(blackbox_model, X_train)\n",
    "pdp_global = pdp.explain_global(name='Partial Dependence')\n",
    "\n",
    "show(pdp_global, 0)"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
